notebook.community

Edit and run



In [1]:

    
import pandas as pd
import pandas.rpy.common as com
import numpy as np
from sklearn.feature_extraction import DictVectorizer

%load_ext autoreload
%autoreload 2

%load_ext rmagic

%matplotlib inline

%precision 2
pd.set_option('display.precision', 3)

import ndl









    



C:\Python27\lib\site-packages\IPython\extensions\rmagic.py:693: UserWarning: The rmagic extension in IPython is deprecated in favour of rpy2.ipython. If available, that will be loaded instead.
http://rpy.sourceforge.net/
  warnings.warn("The rmagic extension in IPython is deprecated in favour of "



In [2]:

    
%%R
library(ndl)



In [3]:

    
data = com.load_data('numbers')
data['Cues'] = [x.split('_') for x in data['Cues']]
data['Number'] = data['Outcomes']
data









    Out[3]:






  
    
      
      Cues
      Outcomes
      Frequency
      Number
    
  
  
    
      1 
                       [size, shape, color, 1, exactly1]
        1
       455
        1
    
    
      2 
                    [size, shape, color, 1, 2, exactly2]
        2
       205
        2
    
    
      3 
                 [size, shape, color, 1, 2, 3, exactly3]
        3
       107
        3
    
    
      4 
              [size, shape, color, 1, 2, 3, 4, exactly4]
        4
        60
        4
    
    
      5 
           [size, shape, color, 1, 2, 3, 4, 5, exactly5]
        5
        50
        5
    
    
      6 
        [size, shape, color, 1, 2, 3, 4, 5, 6, exactly6]
        6
        36
        6
    
    
      7 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac...
        7
        21
        7
    
    
      8 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e...
        8
        20
        8
    
    
      9 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
        9
        13
        9
    
    
      10
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       10
        16
       10
    
    
      11
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       11
         3
       11
    
    
      12
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       12
         4
       12
    
    
      13
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       13
         2
       13
    
    
      14
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       14
         2
       14
    
    
      15
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       15
         4
       15
    
  

15 rows × 4 columns



In [4]:

    
def activation(W):
    return pd.DataFrame([ndl.activation(c,W) for c in data.Cues],index=data.index)

Singular / plural distinction



In [5]:

    
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data









    Out[5]:






  
    
      
      Cues
      Outcomes
      Frequency
      Number
    
  
  
    
      1 
                       [size, shape, color, 1, exactly1]
       singular
       455
        1
    
    
      2 
                    [size, shape, color, 1, 2, exactly2]
         plural
       205
        2
    
    
      3 
                 [size, shape, color, 1, 2, 3, exactly3]
         plural
       107
        3
    
    
      4 
              [size, shape, color, 1, 2, 3, 4, exactly4]
         plural
        60
        4
    
    
      5 
           [size, shape, color, 1, 2, 3, 4, 5, exactly5]
         plural
        50
        5
    
    
      6 
        [size, shape, color, 1, 2, 3, 4, 5, 6, exactly6]
         plural
        36
        6
    
    
      7 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac...
         plural
        21
        7
    
    
      8 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e...
         plural
        20
        8
    
    
      9 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
         plural
        13
        9
    
    
      10
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
         plural
        16
       10
    
    
      11
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
         plural
         3
       11
    
    
      12
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
         plural
         4
       12
    
    
      13
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
         plural
         2
       13
    
    
      14
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
         plural
         2
       14
    
    
      15
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
         plural
         4
       15
    
  

15 rows × 4 columns



In [6]:

    
W = ndl.rw(data,M=10)
A = activation(W)
A









    Out[6]:






  
    
      
      plural
      singular
    
  
  
    
      1 
       0.09
       0.29
    
    
      2 
       0.13
       0.23
    
    
      3 
       0.16
       0.22
    
    
      4 
       0.14
       0.23
    
    
      5 
       0.14
       0.23
    
    
      6 
       0.14
       0.23
    
    
      7 
       0.14
       0.23
    
    
      8 
       0.14
       0.23
    
    
      9 
       0.14
       0.23
    
    
      10
       0.14
       0.23
    
    
      11
       0.14
       0.23
    
    
      12
       0.14
       0.23
    
    
      13
       0.14
       0.23
    
    
      14
       0.14
       0.23
    
    
      15
       0.14
       0.23
    
  

15 rows × 2 columns

With these associations, how many of the 15 items will the learner correctly label?



In [7]:

    
pd.DataFrame([data['Outcomes'], A.idxmax(1), A.idxmax(1) == data['Outcomes']], index = ['Truth', 'Prediction', 'Accurate?']).T









    Out[7]:






  
    
      
      Truth
      Prediction
      Accurate?
    
  
  
    
      1 
       singular
       singular
        True
    
    
      2 
         plural
       singular
       False
    
    
      3 
         plural
       singular
       False
    
    
      4 
         plural
       singular
       False
    
    
      5 
         plural
       singular
       False
    
    
      6 
         plural
       singular
       False
    
    
      7 
         plural
       singular
       False
    
    
      8 
         plural
       singular
       False
    
    
      9 
         plural
       singular
       False
    
    
      10
         plural
       singular
       False
    
    
      11
         plural
       singular
       False
    
    
      12
         plural
       singular
       False
    
    
      13
         plural
       singular
       False
    
    
      14
         plural
       singular
       False
    
    
      15
         plural
       singular
       False
    
  

15 rows × 3 columns



In [8]:

    
np.mean(A.idxmax(1) == data['Outcomes'])









    Out[8]:





0.07

How often are they correct (using relative item frequencies)?



In [9]:

    
float(sum(data['Frequency'] * (A.idxmax(1) == data['Outcomes']))) / float(sum(data['Frequency']))









    Out[9]:





0.46



In [10]:

    
def accuracy(data, M):
    W = ndl.rw(data, M=M)
    A = activation(W)
    return np.mean(A.idxmax(1) == data['Outcomes'])



In [11]:

    
accuracy(data, 10)









    Out[11]:





1.00

For a population of 100 learners trying to acquire the number system, what proportion are able to successfully label all 15 items given M trials?



In [12]:

    
np.mean([accuracy(data, M=10) == 1 for i in xrange(100)])









    Out[12]:





0.32



In [13]:

    
def population_accuracy(M=10, pop=100):
    return np.mean([accuracy(data, M=M) == 1 for i in xrange(pop)])



In [14]:

    
MAX_TRIALS = 500
P = {}



In [15]:

    
P['sg / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]



In [16]:

    
import matplotlib.pyplot as plt

plt.plot(range(1,len(P['sg / pl'])+1), P['sg / pl'], '-', linewidth=2)
plt.title('Singular / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')









    Out[16]:





<matplotlib.text.Text at 0x1789e780>

Dual / non-dual distinction



In [17]:

    
data['Outcomes'] = 'notdual'
data['Outcomes'][2] = 'dual'
data









    Out[17]:






  
    
      
      Cues
      Outcomes
      Frequency
      Number
    
  
  
    
      1 
                       [size, shape, color, 1, exactly1]
       notdual
       455
        1
    
    
      2 
                    [size, shape, color, 1, 2, exactly2]
          dual
       205
        2
    
    
      3 
                 [size, shape, color, 1, 2, 3, exactly3]
       notdual
       107
        3
    
    
      4 
              [size, shape, color, 1, 2, 3, 4, exactly4]
       notdual
        60
        4
    
    
      5 
           [size, shape, color, 1, 2, 3, 4, 5, exactly5]
       notdual
        50
        5
    
    
      6 
        [size, shape, color, 1, 2, 3, 4, 5, 6, exactly6]
       notdual
        36
        6
    
    
      7 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac...
       notdual
        21
        7
    
    
      8 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e...
       notdual
        20
        8
    
    
      9 
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       notdual
        13
        9
    
    
      10
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       notdual
        16
       10
    
    
      11
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       notdual
         3
       11
    
    
      12
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       notdual
         4
       12
    
    
      13
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       notdual
         2
       13
    
    
      14
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       notdual
         2
       14
    
    
      15
       [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...
       notdual
         4
       15
    
  

15 rows × 4 columns



In [18]:

    
P['du / non-du'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]



In [19]:

    
plt.plot(range(1,len(P['du / non-du'])+1), P['du / non-du'], '-', linewidth=2)
plt.title('Dual / non-dual distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')









    Out[19]:





<matplotlib.text.Text at 0x178b8c88>

Singular, dual, plural



In [20]:

    
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'



In [21]:

    
P['sg / du / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]



In [22]:

    
plt.plot(range(1,len(P['sg / du / pl'])+1), P['sg / du / pl'], '-', linewidth=2)
plt.title('Singular / dual / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')









    Out[22]:





<matplotlib.text.Text at 0x17af5fd0>

Singular, dual, trial, plural



In [23]:

    
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'
data['Outcomes'][3] = 'trial'



In [24]:

    
P['sg / du / tr / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]



In [25]:

    
plt.plot(range(1,len(P['sg / du / tr / pl'])+1), P['sg / du / tr / pl'], '-', linewidth=2)
plt.title('Singular / dual / trial / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')









    Out[25]:





<matplotlib.text.Text at 0x17b12668>

Singular, dual, trial, quadral, plural



In [26]:

    
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'
data['Outcomes'][3] = 'trial'
data['Outcomes'][4] = '4ial'



In [27]:

    
P['sg / du / tr / qu / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]



In [28]:

    
plt.plot(range(1,len(P['sg / du / tr / qu / pl'])+1), P['sg / du / tr / qu / pl'], '-', linewidth=2)
plt.title('Singular / dual / trial / quadral plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')









    Out[28]:





<matplotlib.text.Text at 0x17e23c50>

A prediction is that the typology of number systems should roughly correspond to how learnable each type of number system is



In [29]:

    
for n in ('sg / pl', 'sg / du / pl', 'sg / du / tr / pl', 'du / non-du', 'sg / du / tr / qu / pl'):
    plt.plot(range(1,len(P[n])+1), P[n], '-', linewidth=1.5, label=n)
    
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
plt.xlabel('Trials')
plt.legend(loc=(-0.55,0.5))









    Out[29]:





<matplotlib.legend.Legend at 0x17e436d8>

	Cues	Outcomes	Frequency	Number
1	[size, shape, color, 1, exactly1]	1	455	1
2	[size, shape, color, 1, 2, exactly2]	2	205	2
3	[size, shape, color, 1, 2, 3, exactly3]	3	107	3
4	[size, shape, color, 1, 2, 3, 4, exactly4]	4	60	4
5	[size, shape, color, 1, 2, 3, 4, 5, exactly5]	5	50	5
6	[size, shape, color, 1, 2, 3, 4, 5, 6, exactly6]	6	36	6
7	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac...	7	21	7
8	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e...	8	20	8
9	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	9	13	9
10	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	10	16	10
11	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	11	3	11
12	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	12	4	12
13	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	13	2	13
14	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	14	2	14
15	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	15	4	15

	Cues	Outcomes	Frequency	Number
1	[size, shape, color, 1, exactly1]	singular	455	1
2	[size, shape, color, 1, 2, exactly2]	plural	205	2
3	[size, shape, color, 1, 2, 3, exactly3]	plural	107	3
4	[size, shape, color, 1, 2, 3, 4, exactly4]	plural	60	4
5	[size, shape, color, 1, 2, 3, 4, 5, exactly5]	plural	50	5
6	[size, shape, color, 1, 2, 3, 4, 5, 6, exactly6]	plural	36	6
7	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac...	plural	21	7
8	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e...	plural	20	8
9	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	plural	13	9
10	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	plural	16	10
11	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	plural	3	11
12	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	plural	4	12
13	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	plural	2	13
14	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	plural	2	14
15	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	plural	4	15

	plural	singular
1	0.09	0.29
2	0.13	0.23
3	0.16	0.22
4	0.14	0.23
5	0.14	0.23
6	0.14	0.23
7	0.14	0.23
8	0.14	0.23
9	0.14	0.23
10	0.14	0.23
11	0.14	0.23
12	0.14	0.23
13	0.14	0.23
14	0.14	0.23
15	0.14	0.23

	Truth	Prediction	Accurate?
1	singular	singular	True
2	plural	singular	False
3	plural	singular	False
4	plural	singular	False
5	plural	singular	False
6	plural	singular	False
7	plural	singular	False
8	plural	singular	False
9	plural	singular	False
10	plural	singular	False
11	plural	singular	False
12	plural	singular	False
13	plural	singular	False
14	plural	singular	False
15	plural	singular	False

	Cues	Outcomes	Frequency	Number
1	[size, shape, color, 1, exactly1]	notdual	455	1
2	[size, shape, color, 1, 2, exactly2]	dual	205	2
3	[size, shape, color, 1, 2, 3, exactly3]	notdual	107	3
4	[size, shape, color, 1, 2, 3, 4, exactly4]	notdual	60	4
5	[size, shape, color, 1, 2, 3, 4, 5, exactly5]	notdual	50	5
6	[size, shape, color, 1, 2, 3, 4, 5, 6, exactly6]	notdual	36	6
7	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac...	notdual	21	7
8	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e...	notdual	20	8
9	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	notdual	13	9
10	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	notdual	16	10
11	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	notdual	3	11
12	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	notdual	4	12
13	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	notdual	2	13
14	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	notdual	2	14
15	[size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9...	notdual	4	15